library(censusapi)
library(tidycensus)
library(tidyverse)
library(sf)
library(geojsonsf)
library(mapview)
library(dplyr)
library(plotly)
library(tigris)
library(readxl)
library(leaflet)
library(RColorBrewer)
library(sp)
library(usmap)
mapviewOptions(
basemaps = "CartoDB.Positron",
#vector.palette = colorRampPalette(cols)
)
options(
tigris_class = "sf",
tigris_use_cache = TRUE
)
Sys.setenv(CENSUS_KEY="0c313bd613a7281ae62c2fbb004d156d647e9c94")
projection <- "+proj=utm +zone=10 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=ft +no_defs"
# Obtain the census variable lookup data
# acs_vars <-
# listCensusMetadata(
# name = "2018/acs/acs5",
# type = "variables"
# )
# Save an .rds version
# saveRDS(acs_vars, file = "/Users/armellecoutant/Documents/GitHub/218z/Armelle_Coutant/sj_outreach/censusData2018_acs_acs5.rds")
# Obtain the saved census data
acs_vars = readRDS("/Users/armellecoutant/Documents/GitHub/218z/Armelle_Coutant/sj_outreach/censusData2018_acs_acs5.rds")
# Get Santa Clara block groups
sc_blockgroups <-
block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>%
st_transform('+proj=longlat +datum=WGS84')
# Get San Jose geometry
sj_geom <- places("CA", cb = F, progress_bar = FALSE) %>%
filter(NAME == "San Jose") %>%
st_transform('+proj=longlat +datum=WGS84')
# Filter to Santa Clara block groups within San Jose geometry
sj_blockgroups <- sc_blockgroups[st_contains(sj_geom, sc_blockgroups %>% st_centroid())[[1]],]
These vulnerability maps are intended to determine the San Jose block groups with limited access to information and resources. The following factors were selected for the layered map: - Households without Internet Access - Households without Computers - Households Below 80% AMI - Households with Limited English Proficiency
sc_internet <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B28002)"
) %>%
mutate(
blockgroup = paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(
key = "variable",
value = "estimate",
-blockgroup
) %>%
mutate(
label = acs_vars$label[match(variable,acs_vars$name)]
) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "subscription", "type", "additional"), sep = "!!") %>%
filter(is.na(subscription) | subscription == "No Internet access") %>%
mutate(
subscription = replace_na(subscription, "total_num")
) %>%
dplyr::select(blockgroup, estimate, subscription) %>%
spread(key = subscription, value = estimate)
# mutate(
# percent_no_internet = (`No Internet access`*100 / total_num) %>% round(1)
# )
sj_blockgroups_internet <-
sc_internet %>%
filter(blockgroup %in% sj_blockgroups$GEOID) %>%
left_join(sj_blockgroups, by = c("blockgroup" = "GEOID")) %>%
st_as_sf() %>%
st_set_crs(4326) %>%
st_transform(projection)
total_no_internet_sj = sum(sj_blockgroups_internet$`No Internet access`)
total_sj <- sum(sj_blockgroups_internet$total_num)
#perc_no_internet_sj <- total_no_internet_sj*100/total_sj
This is the total number of households in San Jose without internet:
print(total_no_internet_sj)
## [1] 24834
Out of a total number of households:
print(total_sj)
## [1] 308389
mapview(sj_blockgroups_internet %>% dplyr::select('No Internet access'), layer.name = "Households without<br>Internet Access")
sc_computer <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B28003)"
) %>%
mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "computer", "internet"), sep = "!!") %>%
filter(is.na(computer) | computer == "No computer") %>%
mutate(computer = replace_na(computer, "total_num")) %>%
dplyr::select(blockgroup, estimate, computer) %>%
spread(key = computer, value = estimate)
#mutate(percent_no_computer = (`No computer`*100 / total_num) %>% round(1))
sj_blockgroups_computer <-
sc_computer %>%
filter(blockgroup %in% sj_blockgroups$GEOID) %>%
left_join(sj_blockgroups, by = c("blockgroup" = "GEOID")) %>%
st_as_sf() %>%
st_set_crs(4326) %>%
st_transform(projection)
total_no_computer_sj = sum(sj_blockgroups_computer$`No computer`)
total_sj_comp <- sum(sj_blockgroups_computer$total_num)
#perc_no_computer_sj <- total_no_computer_sj*100/total_sj_comp
This is the total number of households in San Jose without a computer:
print(total_no_computer_sj)
## [1] 18010
Out of a total number of households:
print(total_sj)
## [1] 308389
mapview(sj_blockgroups_computer %>% dplyr::select('No computer'), layer.name = "Households without<br>a Computer")
Using area median income (AMI) is a more appropriate way to measure low-income status, as the cost of living is substantially higher than the federal average in San Jose.
sj_median_income_by_block <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "B19013_001E"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
rename(
Median_Income = B19013_001E
) %>%
filter(!is.na(Median_Income)) %>%
left_join(sj_blockgroups, by = c("blockgroup" = "GEOID")) %>%
na.omit()
sj_ami_by_block <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B19001)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
group_by(blockgroup) %>%
summarize(
Total = B19001_001E,
`Under 75,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E),
#sum(lapply(2:12, function(x) as.name(paste0("B19001_00",x,"E"))))
`Under 100,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E)
) %>%
mutate(
`% under 75,000` = `Under 75,000` / Total * 100,
`% under 100,000` = `Under 100,000` / Total * 100
) %>%
left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)
)
sj_ami_by_block <-
sj_ami_by_block %>%
st_as_sf()
mapview(sj_ami_by_block, zcol = "Under 100,000", layer.name = "Households under<br>80% AMI")
sj_lang_by_block <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B16004)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(
key = "variable",
value = "estimate",
- blockgroup
) %>%
left_join(acs_vars, by = c("variable" = "name")) %>%
mutate(
tier = substr(label,lapply(label, function(x) max(unlist(gregexpr('!!',x)))+2),nchar(label))
) %>%
filter(tier %in% c('Speak English "not well"',
'Speak English "not at all"',
'Total', 'Speak Spanish',
'Speak Asian and Pacific Island languages')) %>%
group_by(blockgroup, tier) %>%
summarise(
estimate1 = sum(estimate)
) %>%
spread(
key = "tier",
value = "estimate1"
) %>%
mutate(
`% speaking english < well` = (`Speak English "not well"` + `Speak English "not at all"`) / Total * 100,
`% speaking spanish` = (`Speak Spanish`/ Total) * 100,
`% speaking api` = (`Speak Asian and Pacific Island languages` / Total) * 100,
`Low Proficiency` = sum(`Speak English "not well"` + `Speak English "not at all"`)
) %>%
left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)) %>%
mutate(
log_perc = log(`% speaking english < well`)
) %>%
filter(log_perc > 0) %>%
st_as_sf()
mapview(sj_lang_by_block, zcol = "Low Proficiency", layer.name = "Households with<br>Low English Proficiency")
pal_1 <-
colorNumeric(
palette = "Blues",
domain =
c(0,sj_blockgroups_internet %>%
pull(`No Internet access`) %>%
unique())
)
pal_2 <-
colorNumeric(
palette = "Blues",
domain =
c(0,sj_blockgroups_computer %>%
pull(`No computer`) %>%
unique())
)
pal_3 <-
colorNumeric(
palette = "Blues",
domain =
c(0,sj_ami_by_block %>%
pull(`Under 100,000`) %>%
unique())
)
pal_4 <-
colorNumeric(
palette = "Blues",
domain =
c(0,sj_lang_by_block %>%
pull(`Low Proficiency`) %>%
unique())
)
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = sj_blockgroups_internet %>%
st_transform(4326),
fill = TRUE,
fillColor = ~pal_1(`No Internet access`),
color = "white",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.5,
group = "No Internet",
label = sj_blockgroups_internet$`No Internet access`
) %>%
addPolygons(
data = sj_blockgroups_computer %>%
st_transform(4326),
fill = TRUE,
fillColor = ~pal_2(`No computer`),
color = "white",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.5,
group = "No Computer",
label = sj_blockgroups_computer$`No computer`
) %>%
addPolygons(
data = sj_ami_by_block %>%
st_transform(4326),
fill = TRUE,
fillColor = ~pal_3(`Under 100,000`),
color = "white",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.5,
group = "Under 80% AMI",
label = sj_ami_by_block$`Under 100,000`
) %>%
addPolygons(
data = sj_lang_by_block %>%
st_transform(4326),
fill = TRUE,
fillColor = ~pal_4(`Low Proficiency`),
color = "white",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.5,
group = "Low English Proficiency",
label = sj_lang_by_block$`Low Proficiency`
) %>%
addLayersControl(
overlayGroups = c("No Internet", "No Computer", "Under 80% AMI", "Low English Proficiency")
)
# addLegend(
# data = sj_blockgroups_internet,
# colors = pal,
# values = ~`No Internet access`,
# title = paste0("No Internet<br>Access<br>"),
# opacity = 0.5
# )
# addPopups()
This segment explores different outreach options to target the most vulnerable block groups identified, starting with exploring the language breakdown of different block groups. The first outreach option is placing informational posters in highly visited essential businesses. The second outreach option is placing the posters on street poles. The final outreach option is placing the posters on bus routes.
A block with high % of no internet access:
example <-
sj_blockgroups_internet[29,]
mapview(example)
# This creates file paths for the SafeGraph folder and COVID-19 GitHub folder.
sg_path <- "/Users/armellecoutant/pCloud Drive/SFBI/Restricted Data Library/Safegraph/"
github_path <- "/Users/armellecoutant/Documents/GitHub/covid19/"
# This sets URLs for Point of Interest (POI) location data.
poi_url_Mar2020 <-
paste0(sg_path, "/core/2020/03/CoreRecords-CORE_POI-2019_03-2020-03-25/ca_poi_Mar20.rds")
poi_url_Apr2020 <-
paste0(sg_path, "/core/2020/04/CoreApr2020Release-CORE_POI-2020_03-2020-04-07/ca_poi_Apr20.rds")
poi_url_2019 <-
paste0(sg_path,'covid19analysis/ca_poi.rds')
Can be associated with grocery store locations to determine which grocery stores to target for outreach.
poi_caApr20 <- readRDS (poi_url_Apr2020)
poi_sj <-
poi_caApr20 %>%
filter(city == "San Jose") %>%
filter(naics_code == "445110")
coordinates(poi_sj) <- c("longitude", "latitude")
proj4string(poi_sj) <- CRS("+init=epsg:4326")
mapview(poi_sj)
This script includes code adapted from the following sources: - simone_nfo_internet.Rmd - Dem_analysis_plotly.Rmd